{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ef7d68c0-4630-47f5-abfc-d524c8b5bb4b",
   "metadata": {},
   "outputs": [],
   "source": [
    "using Pkg\n",
    "using JuMP\n",
    "using Ipopt\n",
    "using Distributions\n",
    "using Plots\n",
    "using Dierckx\n",
    "using CSV\n",
    "using DataFrames\n",
    "using KernelDensity\n",
    "using StatsBase\n",
    "using Roots\n",
    "using GaussianDistributions\n",
    "using Interpolations\n",
    "using LaTeXStrings\n",
    "using LinearAlgebra\n",
    "default(legendfontsize = 14, guidefont = (14, :black), tickfont = (14, :black))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "88125b9f-18b9-4d2a-acc5-b2b37e87b423",
   "metadata": {},
   "outputs": [],
   "source": [
    "#set parameters\n",
    "#gm = cost function parameter\n",
    "#tau = progressivity of HSV taxes\n",
    "#lm = average level of HSV taxes\n",
    "gm = 4\n",
    "tau = 0.06\n",
    "lm = 0.91 * (53/100)^tau\n",
    "\n",
    "#load & symmetrize dataset\n",
    "#column 1, df2 = weight\n",
    "#column 2, df2 = spouse 1's earnings\n",
    "#column 3, df2 = spouse 2's earnings\n",
    "#column 4, df2 = total earnings\n",
    "#column 5, df2 = share of spouse 1's earnings\n",
    "#column 6, df2 = spouse 1's productivity implied by HSV taxes\n",
    "#column 7, df2 = spouse 2's productivity implied by HSV taxes\n",
    "df = CSV.read(\"../data/sample.csv\", DataFrame)\n",
    "sort!(df,[:pair])\n",
    "df1 = [df[:,1] df[:,3]/100000]\n",
    "numdf = size(df1)[1]\n",
    "for i=1:2:numdf\n",
    "    df1 = [df1; df1[i+1,:]'; df1[i,:]']\n",
    "end\n",
    "df1[:,1] = df1[:,1]/sum(df1[:,1])\n",
    "df2 = [df1[1:2:end,:] df1[2:2:end,:]]\n",
    "df2= [df2[:,1:2] df2[:,4]]\n",
    "df2 = [df2 df2[:,2]+df2[:,3] df2[:,2]./(df2[:,2]+df2[:,3])]\n",
    "df2 =[df2 ((1-tau)*lm)^(-1/gm) * (df2[:,4]).^(1-1/gm+tau/gm) .* (df2[:,5]).^(1-1/gm)]\n",
    "df2 =[df2 ((1-tau)*lm)^(-1/gm) * (df2[:,4]).^(1-1/gm+tau/gm) .* (1 .- df2[:,5]).^(1-1/gm)]\n",
    "\n",
    "#computate empirical moments for calibration\n",
    "#Exp = expectation of wi\n",
    "#Var = variance of wi\n",
    "#Gini = Gini of wi\n",
    "#Kendel_tau = Kendel's tau of w1 & w2\n",
    "#Pareto_stat = Pareto stat of wi at 99 percentile\n",
    "Exp = 2*sum(df2[:,6].*df2[:,1])\n",
    "Var =  2*sum(df2[:,6].^2 .*df2[:,1]) - (Exp)^2\n",
    "Gini = sum(abs.(df2[:,6] .- df2[:,6]').*(2*df2[:,1]*2*df2[:,1]'))/(2*Exp)\n",
    "Kendel_tau = sum(((df2[:,6] .- df2[:,6]').*(df2[:,7] .- df2[:,7]') .> 0).*(4*df2[:,1]*df2[:,1]')) - sum(((df2[:,6] .- df2[:,6]').*(df2[:,7] .- df2[:,7]') .< 0).*(4*df2[:,1]*df2[:,1]'))\n",
    "uniq_indw = unique(df2[:,6])\n",
    "uniq_indw = [uniq_indw [2*sum((df2[:,6] .<= w) .* df2[:,1]) for w in uniq_indw]]\n",
    "uniq_indw = uniq_indw[sortperm(uniq_indw[:, 1]), :]\n",
    "uniq_indw = [uniq_indw diff([0; uniq_indw[:,2]])]\n",
    "qpsw = 0.99\n",
    "idqw = findmin(abs.(uniq_indw[:,2] .- qpsw))[2]\n",
    "Pareto_stat = 1 + sum(uniq_indw[idqw,1])/(sum(uniq_indw[idqw:end,1].*uniq_indw[idqw:end,3])/sum(uniq_indw[idqw:end,3]) - sum(uniq_indw[idqw,1]))\n",
    "\n",
    "#calibrate structural parameters\n",
    "#aw = tail parameter of PLN cdf\n",
    "#sgw = shape parameter of PLN cdf\n",
    "#muw = location parameter of PLN cdf\n",
    "#rho_gauss = correlation parameter of Gaussian copula\n",
    "aw = Pareto_stat\n",
    "function cginidiff(sg)\n",
    "    g = 2*exp(aw*(aw-1)*sg^2)/(2*aw-1)*cdf(Normal(),(1-2*aw)*sg/sqrt(2))+2*cdf(Normal(),sg/sqrt(2))-1\n",
    "    return g - Gini\n",
    "end\n",
    "sgw = find_zero(cginidiff, (0,2), Bisection())\n",
    "muw = log(Exp*(aw-1)/aw*exp(-sgw^2/2))\n",
    "rho_gauss = sin(Kendel_tau*pi/2)\n",
    "\n",
    "#print all calibrated paramters\n",
    "println(\"aw >>> \",aw, \"muw >>> \", muw, \"sgw >>> \", sgw, \"rho_gauss >>> \", rho_gauss)\n",
    "\n",
    "#compute copulas\n",
    "#Cemp = empirical copula \n",
    "#Cgauss = calibrated Gaussian copula\n",
    "interpqy = Spline1D(uniq_indw[:,2], uniq_indw[:,1], k=1) #linear spline interpolater to approximate invidiaul productivities at given percentile \n",
    "qst = 0.005 #step of quantile grid\n",
    "qgr = qst:qst:1. #quantile grid\n",
    "qgrw = interpqy(qgr) #approxumation of productivities on the quantile grid \n",
    "Cemp = [sum((df2[:,6] .<= w1) .* (df2[:,7] .<= w2) .* df2[:,1]) * 2 for w1 in qgrw, w2 in qgrw] \n",
    "Cgauss = [cdf(Gaussian([0, 0], [1 rho_gauss; rho_gauss 1]), [quantile(Normal(), q1), quantile(Normal(), q2)]) for q1 in qgr, q2 in qgr]\n",
    "\n",
    "#compute densities and pareto weights\n",
    "#pdf1 = marginal density\n",
    "#alpha1 = normalized Pareto weights on singles\n",
    "#pdf2 = joint density\n",
    "#alpha2 = normalized Pareto weights on couples\n",
    "#set geometric grid of individual productivities\n",
    "wmin = 0.12;\n",
    "wmax = 10;\n",
    "nx = 400;\n",
    "wgr = [wmin*(wmax/wmin)^((i-1)/(nx-1)) for i=1:nx]\n",
    "delta = 1 .- (wgr[1:end-1] ./wgr[2:end]).^gm\n",
    "m = 0.35\n",
    "alphaf(w) = exp(-m*w^(gm/(gm-1))) - exp(-m*wmax^(gm/(gm-1)))\n",
    "function cdfpln(t,a,mu,sg)\n",
    "    return cdf(Normal(),(log(t)-mu)/sg)-exp(a*mu+a^2*sg^2/2)*t^(-a)*cdf(Normal(),(log(t)-mu-a*sg^2)/sg);\n",
    "end\n",
    "cdf1 = cdfpln.(wgr[1:end-1],aw,muw,sgw);\n",
    "pdf1 = diff([0; cdf1; 1])   \n",
    "alpha1 = alphaf.(wgr)\n",
    "cdf2 = [cdf(Gaussian([0, 0], [1 0.33; 0.33 1]), [quantile(Normal(), cdfpln(w1,aw,muw,sgw)), quantile(Normal(), cdfpln(w2,aw,muw,sgw))]) for w1 in wgr[1:end-1], w2 in wgr[1:end-1]]\n",
    "pdf2 = diff([zeros(1,nx); diff([zeros(nx,1) [cdf2 cdf1; cdf1' 1]], dims=2)], dims=1) \n",
    "alpha2 = alphaf.(wgr) .+ alphaf.(wgr)'\n",
    "alpha1 = alpha1/sum(alpha1.*pdf1); #normalization of alpha1\n",
    "alpha2 = alpha2/sum(alpha2.*pdf2); #normalization of alpha2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a2dc7def-c362-4d94-b169-51a743f055d7",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Figure 4.(a)\n",
    "#plot calibrated PLN and empirical cdf of individual productivities \n",
    "plot(uniq_indw[:,1], uniq_indw[:,2],line=(2.5,:red,:dash),label=\"empirical\", xlim = [0,2.5], xlabel = L\"productivity, $w$\", ylabel = \"\", legend =:bottomright)\n",
    "plot!(uniq_indw[:,1], cdfpln.(uniq_indw[:,1],aw,muw,sgw),line=(2.5,:blue),label=\"calibrated\")\n",
    "savefig(\"../output/fig4a_G.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d16860b8-89db-443e-9258-7db9928e0577",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Figure 4.(b)\n",
    "#plot left tail of calibrated Gaussian copula and empirical copula\n",
    "nqgrw = length(qgrw)\n",
    "nbot = Int(round(1*nqgrw));\n",
    "ntop = Int(round(0*nqgrw))+1;\n",
    "xgr = quantile(Normal(),qgr);\n",
    "Axgr = [cdf(Gaussian([0, 0], [1 rho_gauss; rho_gauss 1]), [x, x]) for x in xgr];\n",
    "empdata = log.([2*sum((df2[:,6] .<= w).*df2[:,1]) for w in qgrw[1:nbot]]) ./ log.([sum((max.(df2[:,6],df2[:,7]) .<= w).*df2[:,1])*2 for w in qgrw[1:nbot]])\n",
    "\n",
    "plot(qgr[1:nbot-1], empdata[1:nbot-1], \n",
    "    legend =:none, ylim = [0.4,0.8],  label = \"\", line =(2.5,:red,:dash), xlabel=L\"quantile, $u$\", ylabel=\"\")\n",
    "plot!(qgr[1:nbot], log.(qgr[1:nbot])./log.(Axgr[1:nbot]),  label = \"\", line =(2.5,:blue))\n",
    "scatter!([0], [(1 + rho_gauss)/2], markersize=5, c=:black, label =\"\")\n",
    "plot!(qgr[ntop:end-1], (1 + rho_gauss)/2*ones(nqgrw-ntop), markersize=5, c=:black, label =\"\",  line =(1.0,:black,:dot))\n",
    "savefig(\"../output/fig4b_lefttail.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2a667f61-56d9-4758-a0c9-29bc7199df72",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Figure 4.(c)\n",
    "#plot Pareto weights on singles\n",
    "plot(wgr,alpha1,line=(2.5,:blue,:solid), xlim = [0,8], xlabel = L\"productivity, $w$\", ylabel = \"\", legend =:none)\n",
    "savefig(\"../output/fig4c_a1.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "60fffe6d-ca47-40e0-90c0-9f4d49dad12b",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Figure 4.(d)\n",
    "#plot calibrated Gaussian copula and empirical copula\n",
    "contour(qgr,qgr,Cemp, xlim = [0,1], ylim = [0,1], levels = 0.1:0.1:.9, line = (2.5, :red, :dash), cbar = true, xlabel = L\"quantile of spouse 1, $u_1$\", ylabel = L\"quantile of spouse 2, $u_2$\")\n",
    "contour!(qgr,qgr,Cgauss, xlim = [0,1], ylim = [0,1], levels = 0.1:0.1:0.9,  line = (2.5, :blue), label=\"calibrated copula\")\n",
    "plot!([],[],legend=:none, line = (2.5, :red, :dash), label=\"empirical copula\")\n",
    "plot!([],[],legend=:none, line = (2.5, :blue), label=\"calibrated copula\")\n",
    "savefig(\"../output/fig4d_copula.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a83cbeb2-a1ab-424b-a70e-e7298afa4fee",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Figure 4.(e)\n",
    "#plot right tail of calibrated Gaussian copula and empirical copula\n",
    "nqgrw = length(qgrw)\n",
    "nbot = Int(round(1*nqgrw));\n",
    "ntop = Int(round(0*nqgrw))+1;\n",
    "xgr = quantile(Normal(),qgr);\n",
    "Axgr = [cdf(Gaussian([0, 0], [1 rho_gauss; rho_gauss 1]), [x, x]) for x in xgr];\n",
    "plot(qgr[ntop:end-1],log.([2*sum((df2[:,6] .>= w).*df2[:,1]) for w in qgrw[ntop:end-1]]) ./ log.([sum((min.(df2[:,6],df2[:,7]) .>= w).*df2[:,1])*2 for w in qgrw[ntop:end-1]]), ylim=[0.4,0.8], xlabel=L\"quantile, $u$\",  line =(2.5,:red,:dash), legend=:none, label=\"\")\n",
    "plot!(qgr[ntop:end-1], log.((1 .-qgr[ntop:end-1])) ./ log.((1 .+ Axgr[ntop:end-1] .- 2*qgr[ntop:end-1])), line =(2.5,:blue), label=\"\")\n",
    "scatter!([1], [(1 + rho_gauss)/2], markersize=5, c=:black, label =\"\")\n",
    "plot!(qgr[ntop:end-1], (1 + rho_gauss)/2*ones(nqgrw-ntop), markersize=5, c=:black, label =\"\",  line =(1.0,:black,:dot))\n",
    "savefig(\"../output/fig4e_righttail.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "26908a60-af6f-463e-806c-16af39fa4160",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Figure 4.(f)\n",
    "#plot Pareto weights on married\n",
    "contour(wgr,wgr,alpha2, xlim = [0,8], ylim = [0,8], levels = 0.1:0.1:1.2, line = (2.5, :blue, :solid), cbar = true, xlabel = L\"productivity of spouse 1, $w_1$\", ylabel = L\"productivity of spouse 2, $w_2$\")\n",
    "savefig(\"../output/fig4f_a2.pdf\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Julia 1.8.5",
   "language": "julia",
   "name": "julia-1.8"
  },
  "language_info": {
   "file_extension": ".jl",
   "mimetype": "application/julia",
   "name": "julia",
   "version": "1.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
